# -*- coding: utf-8 -*-

import urllib2
from bs4 import BeautifulSoup

if __name__ == '__main__':
    html = urllib2.urlopen("http://www.baidu.com")
    soup = BeautifulSoup(html, 'html.parser')
    print soup.title
    # print soup.a
    # 获取某个标签的集合
    a_list = soup.find_all("a")
    for link in a_list:
        # print link.name, link["href"], link.get_text()
        print link
    print "------------------------"
    find = soup.find('a', id='jgwab')
    print find.name, find["href"], find.get_text()
    # 由于class是关键字，所以需要加  _  来区分
    find2 = soup.find('a', class_='mnav')
    print find2.name, find2["href"], find2.get_text()
    print "================================="
    # 异常处理
    try:
        html = urllib2.urlopen("http://www.dfdf.dfdfd")
    except Exception as e:
        print e
    print "end"

